* generate analysis dataset
********************************************************************************
* prelims
********************************************************************************
{
clear all
set more off
cap log close

}
********************************************************************************
* calculate perceived gini
********************************************************************************
{
* load data
use "$data/survey_netquest.dta", clear

* keep relevant variables
keep RandomID IncTilesPerc_p10 IncTilesPerc_p30 IncTilesPerc_p50 IncTilesPerc_p70 IncTilesPerc_p90 IncTilesPerc_p99

* add every percentile
forval i=1(1)99 {

	cap g IncTilesPerc_p`i' = .

}

* reshape long
reshape long IncTilesPerc_p, i(RandomID) j(p)

* rename 
rename IncTilesPerc_p PercInc

* replace first percentile
replace PercInc = 0 if p==1

* interpolate
bys RandomID: ipolate PercInc p, g(PercIncIpol)

* implied gini
levelsof RandomID, local(IDs)
g PercGini = .
sort RandomID
ineqdec0 PercIncIpol, by(RandomID)

foreach i in `IDs' {
	replace PercGini = r(gini_`i') if RandomID==`i'	
}

* label
la var PercInc "Perceived Income (actual response)"
la var PercIncIpol "Perceived Income (interpolated)"
la var PercGini "Perceived Gini"
la var p "Income percentile"

* store
compress
save "$data/temp/survey_perc_inc_dist.dta", replace

}
********************************************************************************
* add info from intermediate datasets
********************************************************************************
{
* load survey data
use "$data/survey_netquest.dta", clear	

* add address information
merge m:1 StreetType StreetName StreetNum ZIPCode using "$data/int/address_match.dta"
assert _merge==3 
drop _merge

* add plot characteristics
#d;
	local varsplot CPRO CMUN CDIS CSEC BARRI ZIPCode PropYear YearConst QualConst MaxQual MedQual MinQual
	NFloors NProps MaxAptSize MedAptSize MinAptSize PropSqmPLOT_mean 
	PropSqmPLOT_med PropSqmPLOT_sd ShrSqmVPLOT ShrSqmGHPLOT ShrSqmGRPLOT 
	ShrSqmGCPLOT ShrSqmCPLOT ShrSqmOPLOT
	
	;
#d cr
	
* merge
merge m:m PlotCode using "$data/int/catastro_plot_chars.dta", keepusing(`varsplot')
drop if _merge==2
drop _merge

* add Lat/Lon
g year = 2019
merge m:1 PlotCode year using "$data/int/catastro_census_neigh_08_900.dta", keepusing(CUSEC Lat Lon)
drop if _merge == 2
drop _merge
drop year

* distance to new buildings
#d;
	local evvars Dist*_2019 PlotDist*_2019 
	Dist*_2018 PlotDist*_2018 
	Dist*_2017 PlotDist*_2017 
	Dist*_2016 PlotDist*_2016 
	Dist*_2015 PlotDist*_2015 
	Dist*_2014 PlotDist*_2014 
	Dist*_2013 PlotDist*_2013 
	Dist*_2012 PlotDist*_2012
	;
#d cr

merge m:1 PlotCode using "$data/int/Survey_Dist_NewApt.dta", keepusing(`evvars')
drop if _merge==2
drop _merge

* value of dwellings in plot	
rename PlotCode t_PlotCode

forval year = 2019(-1)2015 {

forval i = 1(1)5{

	rename PlotDist`i'_`year' PlotCode

	* add value
	merge m:m PlotCode using "$data/int/PredPriceForest_08_900.dta", keepusing(PHat`year')
	drop if _merge==2
	drop _merge

	rename PHat`year' PHat

	duplicates drop RandomID, force

	bys PlotCode: egen VNewH`year'_`i'_max = max(PHat)
	bys PlotCode: egen VNewH`year'_`i'_min = min(PHat)
	bys PlotCode: egen VNewH`year'_`i'_med = median(PHat)
	bys PlotCode: egen VNewH`year'_`i'_mean = mean(PHat)

	la var VNewH`year'_`i'_max "New Housing: Max value in `year' (`i' closest blng)"
	la var VNewH`year'_`i'_min "New Housing: Min value in `year' (`i' closest blng)"
	la var VNewH`year'_`i'_med "New Housing: Med value in `year' (`i' closest blng)"
	la var VNewH`year'_`i'_mean "New Housing: Mean value in `year' (`i' closest blng)"

	rename PlotCode PlotDist`i'_`year'
	drop PHat

	duplicates drop RandomID, force

} 
} 

* characteristics of closest new apartment
#d;
	local varsnewapt QualConst MedQual NFloors NProps MedAptSize PropSqmPLOT_mean 
	PropSqmPLOT_med ShrSqmVPLOT ShrSqmGHPLOT ShrSqmGRPLOT 
	ShrSqmGCPLOT ShrSqmCPLOT ShrSqmOPLOT
	
	;
#d cr

* rename vars describing chars of respondent plot to merge
foreach var in `varsnewapt' {
	rename `var' t_`var'
}
	
forval year = 2019(-1)2015 {
	forval i = 1(1)5{
	
	* rename variable describing closest plot	
	rename PlotDist`i'_`year' PlotCode

	* merge
	merge m:1 PlotCode using "$data/int/catastro_plot_chars.dta", keepusing(`varsnewapt')
	drop if _merge==2
	drop _merge 

	* rename
	foreach var in `varsnewapt' {
		rename `var' `var'_Ev`i'_`year'
	}

	* rename back
	rename PlotCode PlotDist`i'_`year'
	
	* quality
	g QualConstScaled_Ev`i'_`year' = (QualConst_Ev`i'_`year' * -1) + 10
	drop QualConst_Ev`i'_`year'
	
	g MedQualScaled_Ev`i'_`year' = (MedQual_Ev`i'_`year' * -1) + 10
	drop MedQual_Ev`i'_`year'

	}
} 

* rename back
foreach var in `varsnewapt' PlotCode {
	rename t_`var' `var'
}

* add plot characteristics in census tract - in 2015
g Year = 2015

#d
local AddVar 
	PropSqmCSEC_mean PropSqmCSEC_med MedYearConstCSEC MeanYearConstCSEC 
	MedNFloorsCSEC NAptsCSEC MedNAptsCSEC MedQualCSEC
	MedAptSizeCSEC AreaSqm ShrSqmVCSEC ShrSqmGCCSEC ShrSqmGHCSEC 
	ShrSqmGRCSEC ShrSqmACSEC ShrSqmCCSEC ShrSqmECSEC ShrSqmOCSEC 
	ShrSqmPCSEC ShrSqmRCSEC
	;
#d cr

* add
merge m:1 CPRO CMUN CDIS CSEC Year using "$data/int/catastro_csec_chars.dta", keepusing(`AddVar')
drop if _merge==2
drop _merge

* rename
foreach var in `AddVar' {
	rename `var' `var'_2015
}

* qual scaled in tract
foreach mom in Med {

	g `mom'QualCSECScaled_2015 = (`mom'QualCSEC_2015 * -1) + 10
	la var `mom'QualCSECScaled_2015 "(`mom') Qual Construction in CSEC (2015)"
	drop `mom'QualCSEC_2015

}

drop Year

* add plot characteristics in census tract in 2019
g Year = 2019
merge m:1 CPRO CMUN CDIS CSEC Year using "$data/int/catastro_csec_chars.dta"
drop if _merge==2
drop _merge

* qual scaled in tract
foreach mom in Med {
	
	g `mom'QualCSECScaled = (`mom'QualCSEC * -1) + 10
	la var `mom'QualCSECScaled "(`mom') Qual Construction in CSEC"
	drop `mom'QualCSEC 

}
	
merge m:1 PlotCode Year using "$data/int/IneqChng_08_900_PlotCode.dta"
drop if _merge==2
drop _merge

* LNG
foreach year in 2017 2015 {

cap drop Year
g Year = `year' 	

* relevant vars
local varsvlng VLNG100 VLNG200 VLNG350 VLNG500 Vp50100 Vp50200 Vp50350 Vp50500  

* add data 
merge m:1 PlotCode Year using "$data/int/lng_bcn.dta", keepusing(`varsvlng')
drop if _merge==2
drop _merge

foreach var in `varsvlng' {

	rename `var' `var'_`year'
	la var `var'_`year' "`var' (Base `year')"

}
}

* elections data 2015
g ElecYear = 2015
g ElecMonth = 12
merge m:1 CMUN CDIS CSEC ElecYear ElecMonth using "$data/int/elections_bcn.dta", keepusing(PctVotesLEFT)
drop if _merge==2
drop _merge

* vars
rename PctVotesLEFT PctVotesLEFT_2015
drop ElecYear ElecMonth

* LNG in 2019
cap drop Year	
g Year=2019
local varskeep Neighbors* *LNG*
merge m:1 PlotCode Year using "$data/int/lng_bcn.dta", keepusing(`varskeep')
drop if _merge==2
drop _merge
cap drop Year

* perceived gini
merge 1:m RandomID using "$data/temp/survey_perc_inc_dist.dta", keepusing(PercGini)
keep if _merge==3
drop _merge
duplicates drop RandomID, force

* atlas renta - income census tract
g Year = 2017
local varsatlas MedianPersInc MedianHHInc MeanHHSize
merge m:1 CPRO CDIS CMUN CSEC Year using  "$data/int/ine_atlas_renta_csec.dta", keepusing(`varsatlas')
drop if _merge==2
drop _merge
drop Year

* 2011 census info
* get 2011 tract
foreach var in CSEC CDIS CMUN CPRO {
	rename `var' t_`var'
}
g year = 2011
merge m:1 PlotCode year using "$data/int/catastro_census_neigh_08_900.dta", keepusing(CSEC CDIS CMUN CPRO)
drop if _merge == 2
drop _merge
drop year	
* census vars to keep
global censusvars tertiary_educ tot_house tot_pop
* merge
merge m:1 CSEC CDIS CMUN CPRO using "$data/int/census2011_csec.dta", keepusing($censusvars)
drop if _merge==2
drop _merge

* rename back - 2016 tract
foreach var in CSEC CDIS CMUN CPRO {
	drop `var'
	rename t_`var' `var'
}

* registry covariates in 2015
g gender = "as"
g year = 2015
* merge
merge m:m CPRO CMUN CDIS CSEC gender year using "$data/int/reg_csec_2015.dta", keepusing(pop foreign)
drop if _merge==2
drop _merge
drop gender year
* rename
rename pop pop_2015
rename foreign foreign_2015

* registry - csec
local regkeep pop esp_nationality esp same_reg dif_reg foreign foreign_nnt 
g gender = "as"
g year = 2019
merge m:m CPRO CMUN CDIS CSEC gender year using "$data/int/reg_csec_2019.dta", keepusing(`regkeep')
drop if _merge==2
drop _merge
drop gender year
* not bcn
drop if PropZIP > 8042

* predicted housing value
merge m:m PlotCode using "$data/int/PredPriceForest_08_900.dta", keepusing(PHat2019 PHat2015)
drop if _merge==2
drop _merge
rename PHat2019 PHat
duplicates drop RandomID, force

* median value
bys PlotCode: egen MedVHouse = median(PHat)
la var MedVHouse "Med Value of House (respondent)"

* year 2015
bys PlotCode: egen MedVHouse2015 = median(PHat2015)
la var MedVHouse2015 "Med Value of House in 2015 (respondent)"
drop PHat2015

* remove addresses - just keep PlotCode
drop Prop*StName Prop*StNum Prop*StType Street* ZIPCode* 

* if address matched, then consider address as correct
g GuessCorrZIP = 0 if WrongZIPNET == 1
replace GuessCorrZIP = 1 if GuessCorrZIP==0 & PlotCode != "" & fakecurrent != 1
replace problematic=0 if WrongZIPNET==1 & GuessCorrZIP==1 & WrongGenderNET!=1 & WrongAgeNET!=1
drop fakecurrent

}
********************************************************************************
* additional vars
********************************************************************************
{

* ideology
g Left = (Ideology < 5)
la var Left "Left-wing"
la def Left 1 "Left-wing" 0 "Right-wing"
la val Left Left
g Right = (Ideology > 5)
la var Right "Right-wing"

* voted for left wing party
g LeftWParty = (Party=="CUP"|Party=="ERC"|Party=="PODEMOS"|Party=="PSOE")
replace LeftWParty = . if Party == "NS/NC" 
replace LeftWParty = . if Voted==0

* perceived decile
g ActDecil = floor(IncRankAct/10)
g PercDecil = floor(IncRankPerc/10)

* perceived income rank from 0-1
replace IncRankPerc = IncRankPerc/100
replace InkRankMisp = InkRankMisp/100
replace IncRankAct = IncRankAct/100

* perceived income ratios
g Perc_ratio9050 = IncTilesPerc_p90 / IncTilesPerc_p50
g Perc_ratio9010 = IncTilesPerc_p90 / IncTilesPerc_p10
la var Perc_ratio9010 "Perceived 90/10 Income Ratio"
la var Perc_ratio9050 "Perceived 90/50 Income Ratio"

* perceived gini (Gimpelson and Treisman 2018)
g PercGiniPyr = .
replace PercGiniPyr = 0.42 if regexm(PyramidQ,"Tipo A")==1
replace PercGiniPyr = 0.35 if regexm(PyramidQ,"Tipo B")==1
replace PercGiniPyr = 0.30 if regexm(PyramidQ,"Tipo C")==1
replace PercGiniPyr = 0.20 if regexm(PyramidQ,"Tipo D")==1
replace PercGiniPyr = 0.21 if regexm(PyramidQ,"Tipo E")==1
la var PercGiniPyr "Perceived Gini - Pyramid"
cap drop PyramidQ

* renter/owner
g Renter = (PropType > 3)
la var Renter "Renter"
la def Renter 1 "Renter" 0 "Owner"
la val Renter Renter

* district FE 
egen CDISFE = group(CMUN CDIS)
la val CDISFE BCNDist

* log HH income
g lnIncHH = ln(IncHH + 1)
la var lnIncHH "HH Income"

* label
la var University "University"
la var IncRankAct "Actual Income Rank"
la var Luck "POUM: Luck more important than effort"
la var TrustPol "Trust in Politicians"
la var PropYears "Years in dwelling"
la var MedianHHInc "Median HH Income in Tract (INE)"
la var MedianPersInc "Median Personal Income in Tract (INE)"

* drop vars
drop ticket

replace PercIncRank10yrs = PercIncRank10yrs / 100
replace PercIncRankKid = PercIncRankKid / 100

* shrs in tract and dis
foreach var in foreign {

	g shr`var' = `var'/pop

}

* richest/poorest neighborhood
g Sarria=(CDISFE==5)
g NouBarris = (CDISFE==8)
la def Sarria 1 "Sarrià" 0 "No Sarrià"
la def NouBarris 1 "Nou Barris" 0 "No Nou Barris"
la val Sarria Sarria
la val NouBarris NouBarris

* population density
g PopDensity = pop / AreaSqm 
g PopDensity_2015 = pop_2015 / AreaSqm_2015 
la var PopDensity "Population Density"
la var PopDensity_2015 "Population Density"

global censusvars tertiary_educ tot_house tot_pop
foreach var in $censusvars {
	replace `var' = 0 if `var' == .
}

* logs
local varslog foreign foreign_nnt MedianHHInc MaxAptSize pop $censusvars 

foreach var in `varslog' {
	cap g ln`var' = ln(`var' + 1)
}

* poorest districts
g BelowMDis = (CDISFE==8|CDISFE==7|CDISFE==9|CDISFE==3|CDISFE==10)
la def BelowMDis 1 "Poor District" 0 "Rich District"
la val BelowMDis BelowMDis

* log income value ratios
foreach var in Perc_ratio9050 Perc_ratio9010 {
	g ln`var' = ln(`var')
}
la var lnPerc_ratio9010 "log Perceived 90/10 Income Ratio"
la var lnPerc_ratio9050 "log Perceived 90/50 Income Ratio"


* Party Votes
foreach party in PODEMOS CUP ERC PSOE JXCAT PP CS VOX {
	cap g `party' = (Party == "`party'")
	replace `party' = . if (Party == "" | Party == "NS/NC")

}

* distance to closest metro station
g SEC_CENS = CSEC
g DISTRICTE = CDIS

merge m:m SEC_CENS DISTRICTE using "$data/int/distance_stations_tract.dta", keepusing(METRO*)
drop if _merge == 2
drop _merge
duplicates drop RandomID, force

qui lookfor METRO
g DistMetro = .
la var DistMetro "Distance to Closest Metro Station"
foreach var in `r(varlist)' {

replace DistMetro = `var' if `var' < DistMetro
drop `var'

}

* distance to closest school
merge m:m SEC_CENS DISTRICTE using "$data/int/distance_schools_tract.dta", keepusing(SCL*)
drop if _merge == 2
drop _merge
duplicates drop RandomID, force

g DistSchool = .
la var DistSchool "Distance to Closest School"
qui lookfor SCL
foreach var in `r(varlist)' {

replace DistSchool = `var' if `var' < DistSchool
drop `var'

}

* distance to closest park
merge m:m SEC_CENS DISTRICTE using "$data/int/distance_parks_tract.dta", keepusing(PRK*)
drop if _merge == 2
drop _merge
duplicates drop RandomID, force

g DistPark = .
la var DistPark "Distance to Closest Park"
qui lookfor PRK
foreach var in `r(varlist)' {

replace DistPark = `var' if `var' < DistPark
drop `var'

}

* distance to closest hospital
merge m:m SEC_CENS DISTRICTE using "$data/int/distance_hospitals_tract.dta", keepusing(HSP*)
drop if _merge == 2
drop _merge
duplicates drop RandomID, force

g DistHosp = .
la var DistHosp "Distance to Closest Hospital"
qui lookfor HSP
foreach var in `r(varlist)' {

replace DistHosp = `var' if `var' < DistHosp
drop `var'

}

drop SEC_CENS DISTRICTE

* store
compress
save "$data/analysis_dataset.dta", replace

* erase temp files
cap erase "$data/temp/survey_perc_inc_dist.dta"

}
********************************************************************************
* closing
********************************************************************************
{
	
cap log close
clear

}
